## This script creates an IGV file of coordinated read counts
## Argument 1: TA site file
## Argument 2: Staph genes file
## Argument 3: Counts file from John Santa Maria
## >> python igv_staph.py Staph_TA Staph\ Genes NoDrug

import sys

## Gather all TA sites into dictionary

TAsites = {}

for line in open(sys.argv[1]):
    split = line.split()
    if split[0] == 'TA':
        TAsites[int(split[4])] = [0]
#print TAsites

## Add gene name as second string in dictionary value

for line in open(sys.argv[2]):
    split = line.split('\t')
    if split[0][0:3] == 'SAO':
        gene = split[0]; start = int(split[1]); end = int(split[2])
        for i in range(start, end):
            if i in TAsites: TAsites[i].append(gene)

## Add read counts to first item in dictionary value list

for line in open(sys.argv[3]):
    split = line.split('\t')  
    pos = 0 ## set pos as int denoting position
    if split[1].isdigit() == True:
        if int(split[4]) != 0:
            pos = int(split[1]) 
            #print pos-2
            if pos-2 in TAsites:
                #print pos+1
                #TAsites[pos-2].append(int(split[4]))
                TAsites[pos-2][0] = TAsites[pos-2][0]+int(split[4])
            #print pos
        if int(split[5]) != 0:
            pos = int(split[1]) 
            #print pos
            if pos+1 in TAsites:
                #print pos-2
                #TAsites[pos+1].append(int(split[5]))
                TAsites[pos+1][0] = TAsites[pos+1][0]+int(split[5])
            #print pos
        #if pos in TAsites: TAsites[pos][0] += int(split[6])
#print TAsites
                
## Print

keys = TAsites.keys()
keys.sort()

for k in keys:
    print 'SAOHSC', '\t', int(k), '\t', int(k) + 2, '\t', TAsites[k][0],
    if len(TAsites[k]) > 1: print '\t', TAsites[k][1]
    else: print '\tintergenic'

# for k in keys:
#     print 'SAOHSC', '\t', int(k), '\t', int(k) + 2, '\t', TAsites[k][0],
#     if len(TAsites[k]) > 1: print '\t', TAsites[k][1]
#     else: print

